Notes#

import os

from git import Repo
import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px

from utils import CORPUS_COLOR_SCALE, STD_LAYOUT
CORPUS_PATH = os.environ.get('CORPUS_PATH', "~/dcml_corpora")
CORPUS_PATH
'~/dcml_corpora'
repo = Repo(CORPUS_PATH)
notebook_repo = Repo('.', search_parent_directories=True)
notebook_repo_path = notebook_repo.git.rev_parse("--show-toplevel")
print(f"Notebook repository '{os.path.basename(notebook_repo_path)}' @ {notebook_repo.commit().hexsha[:7]}")
print(f"Data repo '{os.path.basename(CORPUS_PATH)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Notebook repository 'notebooks' @ f478afd
Data repo 'dcml_corpora' @ 3612b3b
dimcat version 0.3.0.post1.dev104+g9c57474
ms3 version 1.2.4.post0.dev2+gf30d960

Data loading#

dataset = dc.Dataset()
for folder in ['corelli', 'liszt_pelerinage']:
    print("Loading", folder)
    path = os.path.join(CORPUS_PATH, folder)
    dataset.load(directory=path)
dataset.data
Loading corelli
Loading liszt_pelerinage
[default|all]
All corpora
-----------
View: This view is called 'default'. It
	- excludes fnames that are not contained in the metadata,
	- filters out file extensions requiring conversion (such as .xml), and
	- excludes review files and folders.

                      has   active   scores measures           notes        expanded
                 metadata     view detected detected parsed detected parsed detected parsed
corpus
corelli               yes  default      149      149    149      149    149      149    149
liszt_pelerinage      yes  default       19       19     19       19     19       19     19

1191/3375 files are excluded from this view.

1176 files have been excluded based on their subdir.
15 files have been excluded based on their file name.
all_metadata = dataset.data.metadata()
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {dataset.data.n_pieces} scores.")
all_metadata.groupby(level=0).nth(0)
Concatenated 'metadata.tsv' files cover 168 of the 168 scores.
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb ... staff_4_instrument score_integrity composed_source lyricist_text imslp musicbrainz viaf wikidata PDF typesetter
corpus
corelli 1: 4/4 1: -1 14 14 56.0 14 14 56.0 NaN 224.00 ... Keyboard NaN NaN NaN NaN NaN NaN NaN NaN NaN
liszt_pelerinage 1: 4/4 1: 0 97 97 388.0 97 97 388.0 NaN 1902.42 ... NaN Tom Schreyer OxfordMusicOnline NaN https://imslp.org/wiki/Ann%C3%A9es_de_p%C3%A8l... https://musicbrainz.org/work/5804701d-54a6-4c9... https://viaf.org/viaf/179020308/ https://www.wikidata.org/wiki/Q567462 https://imslp.org/wiki/Special:ReverseLookup/1... NaN

2 rows × 65 columns

annotated = dc.IsAnnotatedFilter().process_data(dataset)
print(f"Before: {len(dataset.indices[()])} IDs, after filtering: {len(annotated.indices[()])}")
Before: 168 IDs, after filtering: 168

Choose here if you want to see stats for all or only for annotated scores.

#selected = dataset
selected = annotated

Compute chronological order

summary = all_metadata[all_metadata.label_count > 0]
print(f"Selected metadata rows cover {len(summary)} of the {len(sum((ixs for _, ixs in selected.iter_groups()), start=[]))} scores.")
mean_composition_years = summary.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
dataset_colors = dict(zip(chronological_order, CORPUS_COLOR_SCALE))
chronological_order
Selected metadata rows cover 168 of the 168 scores.
['corelli', 'liszt_pelerinage']
all_notes = selected.get_facet('notes')
print(f"{len(all_notes.index)} notes over {len(all_notes.groupby(level=[0,1]))} files.")
all_notes.head()
129856 notes over 168 files.
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice duration nominal_duration scalar tied tpc midi name octave chord_id tremolo gracenote
corpus fname interval
corelli op01n01a [0.0, 1.0) 1 1 0 1.0 0 0 4/4 3 1 1/4 1/4 1 <NA> -1 53 F3 3 8 NaN NaN
[0.0, 1.0) 1 1 0 1.0 0 0 4/4 4 1 1/4 1/4 1 <NA> -1 53 F3 3 14 NaN NaN
[0.0, 1.0) 1 1 0 1.0 0 0 4/4 2 1 1/4 1/4 1 <NA> 3 81 A5 5 4 NaN NaN
[0.0, 1.0) 1 1 0 1.0 0 0 4/4 1 1 1/4 1/4 1 <NA> 0 84 C6 6 0 NaN NaN
[1.0, 2.0) 1 1 1 1.0 1/4 1/4 4/4 3 1 1/4 1/4 1 <NA> 1 55 G3 3 9 NaN NaN
def weight_notes(nl, group_col='midi', precise=True):
    summed_durations = nl.groupby(group_col).duration_qb.sum()
    summed_durations /= summed_durations.min() # normalize such that the shortest duration results in 1 occurrence
    if not precise:
        # This simple trick reduces compute time but also precision:
        # The rationale is to have the smallest value be slightly larger than 0.5 because
        # if it was exactly 0.5 it would be rounded down by repeat_notes_according_to_weights()
        summed_durations /= 1.9999999
    return repeat_notes_according_to_weights(summed_durations)

def repeat_notes_according_to_weights(weights):
    counts = weights.round().astype(int)
    counts_reflecting_weights = []
    for pitch, count in counts.iteritems():
        counts_reflecting_weights.extend([pitch]*count)
    return pd.Series(counts_reflecting_weights)
corpus_names = dict(
    corelli='Corelli Trio Sonatas',
    mozart_piano_sonatas='Mozart Piano Sonatas',
    ABC='Beethoven String Quartets',
    beethoven_piano_sonatas='Beethoven Sonatas',
    chopin_mazurkas='Chopin Mazurkas',
    debussy_suite_bergamasque='Debussy Suite',
    dvorak_silhouettes="Dvořák Silhouettes",
    grieg_lyric_pieces="Grieg Lyric Pieces",
    liszt_pelerinage="Liszt Années",
    medtner_tales="Medtner Tales",
    schumann_kinderszenen="Schumann Kinderszenen",
    tchaikovsky_seasons="Tchaikovsky Seasons"
)
dataset_name_colors = {corpus_names[corp]: color for corp, color in dataset_colors.items()}
chronological_corpus_names = [corpus_names[corp] for corp in chronological_order]
all_notes['dataset_name'] = all_notes.index.get_level_values(0).map(corpus_names)
grouped_notes = all_notes.groupby('dataset_name')
weighted_midi = pd.concat([weight_notes(nl, 'midi', precise=False) for _, nl in grouped_notes], keys=grouped_notes.groups.keys()).reset_index(level=0)
weighted_midi.columns = ['dataset', 'midi']
weighted_midi
/tmp/ipykernel_55525/2427151401.py:14: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for pitch, count in counts.iteritems():
/tmp/ipykernel_55525/2427151401.py:14: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for pitch, count in counts.iteritems():
dataset midi
0 Corelli Trio Sonatas 36
1 Corelli Trio Sonatas 36
2 Corelli Trio Sonatas 36
3 Corelli Trio Sonatas 36
4 Corelli Trio Sonatas 36
... ... ...
100505 Liszt Années 102
100506 Liszt Années 102
100507 Liszt Années 102
100508 Liszt Années 102
100509 Liszt Années 102

116659 rows × 2 columns

yaxis=dict(tickmode= 'array',
           tickvals= [12, 24, 36, 48, 60, 72, 84, 96],
           ticktext = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7"],
           gridcolor='lightgrey',
           )
fig = px.violin(weighted_midi, x='dataset', y='midi', color='dataset', box=True,
                labels=dict(
                    dataset='',
                    midi='distribution of pitches by duration'
                ),
                category_orders=dict(dataset=chronological_corpus_names),
                color_discrete_map=dataset_name_colors,
                width=1000, height=600,
               )
fig.update_traces(spanmode='hard') # do not extend beyond outliers
fig.update_layout(yaxis=yaxis, **STD_LAYOUT,
                 showlegend=False)
fig.show()
bar_data = all_notes.groupby('tpc').duration_qb.sum().reset_index()
x_values = list(range(bar_data.tpc.min(), bar_data.tpc.max()+1))
x_names = ms3.fifths2name(x_values)
fig = px.bar(bar_data, x='tpc', y='duration_qb',
             labels=dict(tpc='Named pitch class',
                             duration_qb='Duration in quarter notes'
                            ),
             color_discrete_sequence=CORPUS_COLOR_SCALE,
             width=1000, height=300,
             )
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
fig.update_xaxes(gridcolor='lightgrey', zerolinecolor='grey', tickmode='array',
                 tickvals=x_values, ticktext = x_names, dtick=1, ticks='outside', tickcolor='black',
                 minor=dict(dtick=6, gridcolor='grey', showgrid=True),
                )
fig.show()
scatter_data = all_notes.groupby(['dataset_name', 'tpc']).duration_qb.sum().reset_index()
fig = px.scatter(scatter_data, x='tpc', y='duration_qb', color='dataset_name',
                 labels=dict(
                     duration_qb='duration',
                     tpc='named pitch class',
                 ),
                 category_orders=dict(dataset=chronological_corpus_names),
                 color_discrete_map=dataset_name_colors,
                 facet_col='dataset_name', facet_col_wrap=3, facet_col_spacing=0.03,
                 width=1000, height=500,
                )
fig.update_traces(mode='lines+markers')
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT, showlegend=False)
fig.update_xaxes(gridcolor='lightgrey', zerolinecolor='lightgrey', tickmode='array', tickvals= [-12, -6, 0, 6, 12, 18],
    ticktext = ["Dbb", "Gb", "C", "F#", "B#", "E##"], visible=True, )
fig.update_yaxes(gridcolor='lightgrey', zeroline=False, matches=None, showticklabels=True)
fig.show()
px.bar(scatter_data, x='tpc', y='duration_qb', color='dataset_name',
                 labels=dict(
                     duration_qb='duration',
                     tpc='named pitch class',
                 ),
                 category_orders=dict(dataset=chronological_corpus_names),
                 color_discrete_map=dataset_name_colors,
                 width=1000, height=500,
                )
no_accidental = bar_data[bar_data.tpc.between(-1,5)].duration_qb.sum()
with_accidental = bar_data[~bar_data.tpc.between(-1,5)].duration_qb.sum()
entire = no_accidental + with_accidental
f"Fraction of note duration without accidental of the entire durations: {no_accidental} / {entire} = {no_accidental / entire}"
'Fraction of note duration without accidental of the entire durations: 67269.33143939394 / 98087.32314814815 = 0.685810656059931'

Notes and staves#

print("Distribution of notes over staves:")
all_notes.staff.value_counts()
Distribution of notes over staves:
1    51729
2    43392
3    18602
4    16133
Name: staff, dtype: Int64
print("Distribution of notes over staves for all pieces with more than two staves\n")
for group, df in all_notes.groupby(level=[0,1]):
    if (df.staff > 2).any():
        print(group)
        print(df.staff.value_counts().to_dict())
Distribution of notes over staves for all pieces with more than two staves

('corelli', 'op01n01a')
{1: 78, 3: 72, 4: 70, 2: 69}
('corelli', 'op01n01b')
{1: 259, 2: 237, 3: 143, 4: 116}
('corelli', 'op01n01c')
{3: 90, 4: 90, 2: 75, 1: 74}
('corelli', 'op01n01d')
{1: 280, 2: 229, 4: 199, 3: 171}
('corelli', 'op01n02a')
{2: 89, 1: 84, 4: 77, 3: 74}
('corelli', 'op01n02b')
{1: 142, 3: 135, 2: 127, 4: 111}
('corelli', 'op01n02c')
{2: 62, 1: 61, 3: 56, 4: 56}
('corelli', 'op01n02d')
{1: 223, 2: 214, 4: 203, 3: 181}
('corelli', 'op01n03a')
{1: 86, 2: 81, 3: 56, 4: 56}
('corelli', 'op01n03b')
{2: 319, 1: 315, 3: 314, 4: 232}
('corelli', 'op01n03c')
{2: 76, 1: 68, 3: 61}
('corelli', 'op01n03d')
{1: 332, 2: 262, 4: 167, 3: 133}
('corelli', 'op01n04a')
{1: 159, 2: 136, 3: 66, 4: 48}
('corelli', 'op01n04b')
{2: 55, 1: 53, 4: 47, 3: 46}
('corelli', 'op01n04c')
{2: 218, 1: 216, 4: 160, 3: 117}
('corelli', 'op01n04d')
{1: 157, 2: 149, 3: 135, 4: 122}
('corelli', 'op01n05a')
{1: 105, 2: 96, 3: 85, 4: 85}
('corelli', 'op01n05b')
{1: 240, 2: 190, 4: 183, 3: 180}
('corelli', 'op01n05c')
{1: 82, 3: 78, 4: 78, 2: 77}
('corelli', 'op01n05d')
{1: 210, 2: 202, 4: 129, 3: 129}
('corelli', 'op01n06a')
{1: 56, 2: 50, 3: 42, 4: 42}
('corelli', 'op01n06b')
{1: 266, 2: 238, 3: 191, 4: 189}
('corelli', 'op01n06c')
{1: 106, 2: 102, 4: 77, 3: 71}
('corelli', 'op01n06d')
{1: 192, 2: 189, 4: 139, 3: 112}
('corelli', 'op01n07a')
{1: 323, 2: 274, 3: 164, 4: 150}
('corelli', 'op01n07b')
{3: 50, 4: 50, 2: 49, 1: 45}
('corelli', 'op01n07c')
{1: 253, 2: 205, 3: 188, 4: 171}
('corelli', 'op01n08a')
{4: 52, 1: 51, 3: 49, 2: 44}
('corelli', 'op01n08b')
{1: 148, 2: 122, 3: 95, 4: 80}
('corelli', 'op01n08c')
{1: 122, 2: 117, 4: 108, 3: 97}
('corelli', 'op01n08d')
{1: 102, 2: 89, 4: 69, 3: 65}
('corelli', 'op01n09a')
{1: 125, 2: 124, 4: 67, 3: 61}
('corelli', 'op01n09b')
{1: 226, 2: 196, 4: 165, 3: 153}
('corelli', 'op01n09c')
{1: 89, 2: 82, 3: 78, 4: 78}
('corelli', 'op01n09d')
{1: 186, 2: 183, 3: 89, 4: 76}
('corelli', 'op01n10a')
{2: 41, 3: 39, 4: 39, 1: 37}
('corelli', 'op01n10b')
{1: 130, 4: 90, 2: 79, 3: 71}
('corelli', 'op01n10c')
{1: 145, 2: 134, 3: 122, 4: 120}
('corelli', 'op01n10d')
{1: 55, 2: 51, 3: 46, 4: 46}
('corelli', 'op01n10e')
{1: 250, 2: 229, 3: 149, 4: 135}
('corelli', 'op01n11a')
{2: 67, 1: 67, 3: 42, 4: 42}
('corelli', 'op01n11b')
{1: 225, 2: 192, 3: 89, 4: 81}
('corelli', 'op01n11c')
{2: 76, 1: 74, 3: 70, 4: 70}
('corelli', 'op01n11d')
{3: 133, 4: 133, 1: 97, 2: 85}
('corelli', 'op01n12a')
{3: 144, 4: 144, 1: 78, 2: 67}
('corelli', 'op01n12b')
{1: 335, 2: 285, 3: 71, 4: 71}
('corelli', 'op01n12c')
{2: 51, 1: 46, 3: 38, 4: 38}
('corelli', 'op01n12d')
{1: 358, 2: 323, 3: 305, 4: 244}
('corelli', 'op03n01a')
{3: 101, 4: 101, 1: 78, 2: 71}
('corelli', 'op03n01b')
{1: 261, 2: 220, 3: 168, 4: 144}
('corelli', 'op03n01c')
{1: 181, 2: 158, 3: 152, 4: 137}
('corelli', 'op03n01d')
{1: 183, 3: 173, 4: 170, 2: 157}
('corelli', 'op03n02a')
{3: 110, 4: 110, 1: 62, 2: 55}
('corelli', 'op03n02b')
{1: 155, 2: 145, 3: 113, 4: 113}
('corelli', 'op03n02c')
{2: 89, 1: 87, 3: 86, 4: 86}
('corelli', 'op03n02d')
{1: 202, 2: 156, 3: 147, 4: 141}
('corelli', 'op03n03a')
{3: 71, 4: 71, 1: 63, 2: 58}
('corelli', 'op03n03b')
{1: 113, 2: 110, 3: 55, 4: 55}
('corelli', 'op03n03c')
{1: 155, 2: 131, 3: 104, 4: 104}
('corelli', 'op03n03d')
{1: 268, 3: 249, 4: 246, 2: 222}
('corelli', 'op03n04a')
{3: 138, 1: 135, 4: 134, 2: 131}
('corelli', 'op03n04b')
{1: 170, 2: 156, 3: 102, 4: 100}
('corelli', 'op03n04c')
{1: 166, 3: 123, 4: 123, 2: 118}
('corelli', 'op03n04d')
{1: 215, 2: 207, 3: 178, 4: 178}
('corelli', 'op03n05a')
{3: 113, 4: 107, 1: 89, 2: 85}
('corelli', 'op03n05b')
{1: 264, 2: 204, 3: 195, 4: 169}
('corelli', 'op03n05c')
{1: 84, 2: 72, 3: 70, 4: 70}
('corelli', 'op03n05d')
{1: 228, 2: 191, 3: 163, 4: 129}
('corelli', 'op03n06a')
{1: 177, 2: 155, 3: 130, 4: 130}
('corelli', 'op03n06b')
{1: 53, 4: 52, 3: 47, 2: 47}
('corelli', 'op03n06c')
{1: 235, 2: 192, 4: 175, 3: 166}
('corelli', 'op03n06d')
{1: 181, 2: 160, 3: 131, 4: 131}
('corelli', 'op03n07a')
{4: 66, 1: 62, 3: 60, 2: 52}
('corelli', 'op03n07b')
{4: 135, 1: 126, 3: 112, 2: 103}
('corelli', 'op03n07c')
{1: 93, 3: 84, 4: 84, 2: 78}
('corelli', 'op03n07d')
{1: 139, 2: 108, 3: 107, 4: 107}
('corelli', 'op03n08a')
{1: 122, 3: 119, 4: 119, 2: 106}
('corelli', 'op03n08b')
{1: 266, 2: 232, 3: 188, 4: 186}
('corelli', 'op03n08c')
{3: 76, 4: 75, 2: 73, 1: 68}
('corelli', 'op03n08d')
{1: 331, 2: 323, 3: 268, 4: 232}
('corelli', 'op03n09a')
{1: 69, 2: 61, 3: 59, 4: 59}
('corelli', 'op03n09b')
{1: 127, 2: 115, 3: 113, 4: 111}
('corelli', 'op03n09c')
{1: 93, 2: 90, 3: 85, 4: 85}
('corelli', 'op03n09d')
{3: 182, 4: 164, 1: 117, 2: 110}
('corelli', 'op03n10a')
{1: 67, 2: 59, 3: 51, 4: 51}
('corelli', 'op03n10b')
{1: 271, 2: 237, 3: 234, 4: 232}
('corelli', 'op03n10c')
{3: 23, 4: 23, 1: 23, 2: 20}
('corelli', 'op03n10d')
{1: 271, 2: 233, 3: 71, 4: 71}
('corelli', 'op03n11a')
{1: 84, 2: 79, 3: 62, 4: 62}
('corelli', 'op03n11b')
{3: 250, 4: 233, 1: 170, 2: 161}
('corelli', 'op03n11c')
{2: 62, 1: 62, 3: 61, 4: 61}
('corelli', 'op03n11d')
{1: 112, 3: 97, 4: 97, 2: 91}
('corelli', 'op03n12a')
{1: 121, 2: 119, 3: 29, 4: 29}
('corelli', 'op03n12b')
{1: 148, 2: 142, 3: 41, 4: 41}
('corelli', 'op03n12c')
{3: 41, 4: 41, 1: 34, 2: 26}
('corelli', 'op03n12d')
{3: 316, 4: 182, 2: 96, 1: 95}
('corelli', 'op03n12e')
{1: 184, 2: 164, 3: 123, 4: 72}
('corelli', 'op03n12f')
{1: 301, 2: 273, 3: 235, 4: 171}
('corelli', 'op03n12g')
{3: 197, 4: 197, 1: 192, 2: 172}
('corelli', 'op04n01a')
{1: 84, 3: 72, 4: 72, 2: 69}
('corelli', 'op04n01b')
{3: 156, 1: 156, 2: 108}
('corelli', 'op04n01c')
{3: 82, 2: 79, 1: 72}
('corelli', 'op04n01d')
{3: 258, 4: 258, 1: 112, 2: 91}
('corelli', 'op04n02a')
{3: 131, 4: 131, 2: 76, 1: 74}
('corelli', 'op04n02b')
{1: 135, 3: 132, 2: 115}
('corelli', 'op04n02c')
{3: 11, 2: 11, 1: 11}
('corelli', 'op04n02d')
{1: 131, 2: 113, 3: 111, 4: 111}
('corelli', 'op04n03a')
{1: 104, 2: 88, 3: 80, 4: 80}
('corelli', 'op04n03b')
{1: 263, 3: 113, 4: 113, 2: 107}
('corelli', 'op04n03c')
{3: 84, 4: 84, 1: 58, 2: 47}
('corelli', 'op04n03d')
{1: 140, 3: 122, 4: 122, 2: 98}
('corelli', 'op04n04a')
{3: 91, 4: 91, 1: 62, 2: 60}
('corelli', 'op04n04b')
{1: 116, 2: 94, 3: 87, 4: 87}
('corelli', 'op04n04c')
{3: 116, 4: 116, 1: 86, 2: 86}
('corelli', 'op04n04d')
{1: 304, 3: 178, 2: 166}
('corelli', 'op04n05a')
{3: 112, 4: 112, 1: 95, 2: 86}
('corelli', 'op04n05b')
{3: 213, 4: 213, 1: 117, 2: 107}
('corelli', 'op04n05c')
{1: 89, 2: 84, 3: 75, 4: 75}
('corelli', 'op04n05d')
{1: 40, 2: 34, 3: 34, 4: 34}
('corelli', 'op04n06a')
{3: 39, 1: 28, 2: 20}
('corelli', 'op04n06b')
{1: 91, 2: 84, 3: 44}
('corelli', 'op04n06c')
{1: 18, 2: 16, 3: 15}
('corelli', 'op04n06d')
{1: 116, 2: 93, 3: 76}
('corelli', 'op04n06e')
{1: 22, 2: 21, 3: 20}
('corelli', 'op04n06f')
{1: 138, 2: 87, 3: 66, 4: 66}
('corelli', 'op04n06g')
{1: 226, 2: 139, 3: 105, 4: 105}
('corelli', 'op04n07a')
{1: 98, 3: 85, 4: 85, 2: 80}
('corelli', 'op04n07b')
{1: 111, 3: 99, 2: 93}
('corelli', 'op04n07c')
{3: 12, 2: 12, 1: 12}
('corelli', 'op04n07d')
{3: 164, 4: 164, 2: 59, 1: 49}
('corelli', 'op04n07e')
{1: 309, 3: 88, 4: 88, 2: 78}
('corelli', 'op04n08a')
{1: 90, 2: 85, 3: 80, 4: 80}
('corelli', 'op04n08b')
{3: 342, 4: 342, 2: 87, 1: 85}
('corelli', 'op04n08c')
{3: 85, 4: 85, 2: 47, 1: 47}
('corelli', 'op04n09a')
{2: 111, 1: 105, 3: 78, 4: 78}
('corelli', 'op04n09b')
{1: 175, 2: 147, 3: 120, 4: 120}
('corelli', 'op04n09c')
{1: 45, 3: 39, 4: 39, 2: 37}
('corelli', 'op04n09d')
{1: 258, 2: 220, 4: 143, 3: 140}
('corelli', 'op04n10a')
{3: 4, 1: 4, 2: 4}
('corelli', 'op04n10b')
{1: 304, 3: 186, 2: 163}
('corelli', 'op04n10c')
{2: 8, 3: 7, 1: 7}
('corelli', 'op04n10d')
{3: 51, 4: 51, 1: 45, 2: 38}
('corelli', 'op04n10e')
{1: 162, 3: 140, 4: 140, 2: 126}
('corelli', 'op04n11a')
{3: 153, 4: 153, 1: 109, 2: 105}
('corelli', 'op04n11b')
{3: 238, 4: 238, 1: 234, 2: 175}
('corelli', 'op04n11c')
{3: 268, 4: 268, 1: 121, 2: 116}
('corelli', 'op04n12a')
{1: 90, 3: 80, 4: 80, 2: 75}
('corelli', 'op04n12b')
{1: 296, 3: 110, 4: 110, 2: 105}
('corelli', 'op04n12c')
{1: 207, 2: 61, 3: 58, 4: 58}
('liszt_pelerinage', '161.04_Sonetto_47_del_Petrarca')
{1: 1076, 2: 628, 3: 42, 4: 29}
('liszt_pelerinage', '161.07_Apres_une_lecture_du_Dante')
{1: 6638, 2: 5181, 3: 50}
('liszt_pelerinage', '162.01_Gondoliera')
{3: 1745, 4: 955}
all_notes[all_notes.staff > 2].groupby(level=[0,1]).staff.value_counts()
corpus            fname                              staff
corelli           op01n01a                           3          72
                                                     4          70
                  op01n01b                           3         143
                                                     4         116
                  op01n01c                           3          90
                                                              ...
liszt_pelerinage  161.04_Sonetto_47_del_Petrarca     3          42
                                                     4          29
                  161.07_Apres_une_lecture_du_Dante  3          50
                  162.01_Gondoliera                  3        1745
                                                     4         955
Name: staff, Length: 287, dtype: int64